###################################################################################################
###################################       loadin  library     #####################################
###################################################################################################
cccol <- c("#CE0013","#16557A","#C7A609","#87C232","#64C0AB","#A14C94","#15A08C","#8B7E75","#1E7CAF","#EA425F","#46489A","#E50033","#0F231F","#1187CD")
library(gplots)
library(amap)

SpecificGene <- function(x,given_time,mean_cutoff){
    # find whether this gene is specific expressed in the given time point
    if (mean(x) >= mean_cutoff & given_time %in% names(x)[order(x,decreasing=T)[1:1]]){
        return (x[given_time]/mean(x))
    }
    else{
        return (NA)
    }
}

###################################################################################################
###################################         read data         #####################################
###################################################################################################
Oocyte <- 1:3; Zygote <- 4:6; cell2 <- 7:12; cell4 <- 13:24; cell8 <- 25:43; Morula <- 44:59;MTE <- c(63,66,68,69,71,76:79);PTE <- c(60:62,64,65,67,70,73,81); PE <- c(83:89);EPI <- c(72,74:75,80,82);hESC0 <- 90:95; hESC10 <- 96:121
development_repeats_fpkm <- read.table("../data/develop_repeats_fpkm.txt",row.names=1,header=T)
developmentRepeatsAverageFpkm <- cbind(apply(development_repeats_fpkm[Oocyte],1,mean),apply(development_repeats_fpkm[Zygote],1,mean),apply(development_repeats_fpkm[cell2],1,mean),
	apply(development_repeats_fpkm[cell4],1,mean),apply(development_repeats_fpkm[cell8],1,mean),apply(development_repeats_fpkm[Morula],1,mean),
	apply(development_repeats_fpkm[MTE],1,mean),apply(development_repeats_fpkm[PTE],1,mean),apply(development_repeats_fpkm[PE],1,mean),apply(development_repeats_fpkm[EPI],1,mean),
	apply(development_repeats_fpkm[hESC0],1,mean),apply(development_repeats_fpkm[hESC10],1,mean))
colnames(developmentRepeatsAverageFpkm) <- c("Oocyte","Zygote","2cell","4cell","8cell","Morula","MTE","PTE","PE","EPI","hESC0","hESC10")
develop_path <- c("Oocyte","Zygote","2cell","4cell","8cell","Morula","MTE","PTE","PE","EPI","hESC0","hESC10")
# developmentRepeatsAverageFpkm <- developmentRepeatsAverageFpkm[,develop_path]
AmplifyDevelopmentRepeatsAverageFpkm <- developmentRepeatsAverageFpkm * 1e3

he0 <- 1:2; he2 <- 3:4; he6 <- 5:6; hiF <- 7:8; n10 <- 9; n12 <- 10:11; n14 <- c(12:13,25); n20 <- c(14:15,26); n24m <- 16:17; n24p <- 18:19; n8 <- c(20:21,24); niPS <- 22:23
naive_repeats_fpkm <- read.table("../data/naive_repeats_fpkm.txt",row.names=1,header=T)
naiveRepeatsAverageFpkm <- cbind(apply(naive_repeats_fpkm[hiF],1,mean),apply(naive_repeats_fpkm[he0],1,mean),apply(naive_repeats_fpkm[he2],1,mean),
	apply(naive_repeats_fpkm[he6],1,mean),apply(naive_repeats_fpkm[n8],1,mean),apply(naive_repeats_fpkm[n12],1,mean),
	apply(naive_repeats_fpkm[n14],1,mean),apply(naive_repeats_fpkm[n20],1,mean),apply(naive_repeats_fpkm[n24m],1,mean),apply(naive_repeats_fpkm[n24p],1,mean),
	apply(naive_repeats_fpkm[niPS],1,mean))
naiveRepeatsSD <- cbind(apply(naive_repeats_fpkm[hiF],1,sd),apply(naive_repeats_fpkm[he0],1,sd),apply(naive_repeats_fpkm[he2],1,sd),
	apply(naive_repeats_fpkm[he6],1,sd),apply(naive_repeats_fpkm[n8],1,sd),apply(naive_repeats_fpkm[n12],1,sd),
	apply(naive_repeats_fpkm[n14],1,sd),apply(naive_repeats_fpkm[n20],1,sd),apply(naive_repeats_fpkm[n24m],1,sd),apply(naive_repeats_fpkm[n24p],1,sd),
	apply(naive_repeats_fpkm[niPS],1,sd))
colnames(naiveRepeatsAverageFpkm) <- c("hiF","he0","he2","he6","n8","n12","n14","n20","n24m","n24p","niPS")
colnames(naiveRepeatsSD) <- c("hiF","he0","he2","he6","n8","n12","n14","n20","n24m","n24p","niPS")
AmplifyNaiveRepeatsAverageFpkm <- naiveRepeatsAverageFpkm * 1e3
AmplifyNaiveRepeatsSD <- naiveRepeatsSD * 1e3

p2 <- 1:2; p5 <- 3:4; p8 <- 5:6; p10 <- 7:8; p14 <- 9:10; p20 <- 11:12; p24m <- 13:16; p24p <- 17:18; hiFT <- 19:22; piPS <- 23:26
primed_repeats_fpkm <- read.table("../data/primed_repeats_fpkm.txt",row.names=1,header=T)
primedRepeatsAverageFpkm <- cbind(apply(primed_repeats_fpkm[hiFT],1,mean),apply(primed_repeats_fpkm[p2],1,mean),apply(primed_repeats_fpkm[p5],1,mean),
	apply(primed_repeats_fpkm[p8],1,mean),apply(primed_repeats_fpkm[p10],1,mean),apply(primed_repeats_fpkm[p14],1,mean),apply(primed_repeats_fpkm[p20],1,mean),
	apply(primed_repeats_fpkm[p24m],1,mean),apply(primed_repeats_fpkm[p24p],1,mean),apply(primed_repeats_fpkm[piPS],1,mean))
primedRepeatsSD <- cbind(apply(primed_repeats_fpkm[hiFT],1,sd),apply(primed_repeats_fpkm[p2],1,sd),apply(primed_repeats_fpkm[p5],1,sd),
	apply(primed_repeats_fpkm[p8],1,sd),apply(primed_repeats_fpkm[p10],1,sd),apply(primed_repeats_fpkm[p14],1,sd),apply(primed_repeats_fpkm[p20],1,sd),
	apply(primed_repeats_fpkm[p24m],1,sd),apply(primed_repeats_fpkm[p24p],1,sd),apply(primed_repeats_fpkm[piPS],1,sd))
colnames(primedRepeatsAverageFpkm) <- c("hiFT","p2","p5","p8","p10","p14","p20","p24m","p24p","piPS")
colnames(primedRepeatsSD) <- c("hiFT","p2","p5","p8","p10","p14","p20","p24m","p24p","piPS")
AmplifyPrimedRepeatsAverageFpkm <- primedRepeatsAverageFpkm * 1e3
AmplifyPrimedRepeatsSD <- primedRepeatsSD * 1e3

repeats_class_family <- read.table("../data/hg19.repeats.class.family")

###################################################################################################
##############################         specific gene dection        ###############################
###################################################################################################

develop_eight_cell <- apply(AmplifyDevelopmentRepeatsAverageFpkm[,c("Oocyte","Zygote","2cell","4cell","8cell","Morula","MTE","PTE","PE","EPI")],1,SpecificGene,"8cell",0.1)
develop_8cell_gene <- names(sort(develop_eight_cell,decreasing=T)[1:100])
write.table(developmentRepeatsAverageFpkm[develop_8cell_gene,],file="8c_repeats_in_develop_fpkm.txt",quote=F,sep="\t",col.names=T,row.names=T)
naive_n20 <- apply(AmplifyNaiveRepeatsAverageFpkm,1,SpecificGene,"n20",0.08)
naive_n24m <- apply(AmplifyNaiveRepeatsAverageFpkm,1,SpecificGene,"n24m",0.08)
naive_n24p <- apply(AmplifyNaiveRepeatsAverageFpkm,1,SpecificGene,"n24p",0.08)
naive_niPS <- apply(AmplifyNaiveRepeatsAverageFpkm,1,SpecificGene,"niPS",0.08)
naive_n20_gene <- na.omit(names(sort(naive_n20,decreasing=T)[1:50]))
naive_n24m_gene <- na.omit(names(sort(naive_n24m,decreasing=T)[1:50]))
naive_n24p_gene <- na.omit(names(sort(naive_n24p,decreasing=T)[1:50]))
naive_niPS_gene <- na.omit(names(sort(naive_niPS,decreasing=T)[1:50]))
selected_time_genes <- list(naive_n20_gene,naive_n24m_gene,naive_n24p_gene,naive_niPS_gene)
###################################################################################################
###################################           plot            #####################################
###################################################################################################

pdf("Fig3G.pdf",width=5.5,height=4)
v1 = apply(AmplifyDevelopmentRepeatsAverageFpkm[develop_8cell_gene,develop_path],2,mean)
n <- length(develop_8cell_gene)
sd <- apply(AmplifyDevelopmentRepeatsAverageFpkm[develop_8cell_gene,develop_path],2,sd)
alpha <- 0.05
v2 = v1 - sd/sqrt(n)*qt(1-alpha/2,n-1)
v3 = v1 + sd/sqrt(n)*qt(1-alpha/2,n-1)
plot(v1,lwd=3,type="l",col=cccol[1],main="8C repeats in development",xlab=NA,ylab="FPKM*1000",xaxt="n",ylim=c(0,max(v1,v2,v3)))
axis(side=1,1:length(develop_path),develop_path,las=2);
polygon(c(1,1:length(develop_path),length(develop_path):2),c(v2[1],v3,v2[length(develop_path):2]),col=adjustcolor("grey", alpha.f = 0.4),border=NA)     
dev.off()

pdf("Fig3H.pdf",width=5.5,height=4)
nData <- AmplifyNaiveRepeatsAverageFpkm[develop_8cell_gene,c("hiF","he0","he2","he6","n8","n12","n14","n20","n24p","n24m","niPS")]
xmax <- ncol(nData)
nMean <- apply(nData,2,mean)
nSd <- apply(nData,2,sd)
n <- length(develop_8cell_gene)
nv1 <- nMean
alpha <- 0.05
nv2 <- nMean - nSd/sqrt(n)*qt(1-alpha/2,n-1)
nv3 <- nMean + nSd/sqrt(n)*qt(1-alpha/2,n-1)
plot(nv1,lwd=3,type="l",col=cccol[1],ylim=c(0,max(nv1,nv2,nv3)),xlim=c(1,xmax),main="8C repeats in reprogramming",ylab="FPKM*1000",xlab="",xaxt="n")
axis(1,at=seq(xmax),labels=c("hiF-T","0d","2d","6d","8d","12d","14d","20d","24d+dox","24d-dox","niPSC-T"),las=2)
polygon(c(1,1:ncol(nData),ncol(nData):2),c(nv2[1],nv3,nv2[ncol(nData):2]),col=adjustcolor("grey", alpha.f = 0.4),border=NA)
dev.off()